<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="robots" content="noodp" />
        <title>大模型入门教学 - 大模型系统</title><meta name="Description" content="大模型入门教学"><meta property="og:title" content="大模型入门教学" />
<meta property="og:description" content="大模型入门教学" />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://kha-zix-1.gitee.io/llm-blog/get-started/" /><meta property="og:image" content="https://kha-zix-1.gitee.io/llm-blog/get-started/featured-image.jpg" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2024-03-31T21:29:01+08:00" />
<meta property="article:modified_time" content="2024-04-02T09:35:38+08:00" /><meta property="og:site_name" content="My cool site" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:image" content="https://kha-zix-1.gitee.io/llm-blog/get-started/featured-image.jpg" /><meta name="twitter:title" content="大模型入门教学"/>
<meta name="twitter:description" content="大模型入门教学"/>
      <meta name="twitter:site" content="@xxxx"/>
<meta name="application-name" content="My cool site">
<meta name="apple-mobile-web-app-title" content="My cool site"><meta name="theme-color" content="#ffffff"><meta name="msapplication-TileColor" content="#da532c"><link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
        <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
        <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png"><link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png"><link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5"><link rel="manifest" href="/site.webmanifest"><link rel="canonical" href="https://kha-zix-1.gitee.io/llm-blog/get-started/" /><link rel="stylesheet" href="/llm-blog/css/style.min.css"><link rel="preload" href="/llm-blog/lib/fontawesome-free/all.min.css" as="style" onload="this.onload=null;this.rel='stylesheet'">
        <noscript><link rel="stylesheet" href="/llm-blog/lib/fontawesome-free/all.min.css"></noscript><link rel="preload" href="/llm-blog/lib/animate/animate.min.css" as="style" onload="this.onload=null;this.rel='stylesheet'">
        <noscript><link rel="stylesheet" href="/llm-blog/lib/animate/animate.min.css"></noscript><script type="application/ld+json">
    {
        "@context": "http://schema.org",
        "@type": "BlogPosting",
        "headline": "大模型入门教学",
        "inLanguage": "en",
        "mainEntityOfPage": {
            "@type": "WebPage",
            "@id": "https:\/\/kha-zix-1.gitee.io\/llm-blog\/get-started\/"
        },"image": [{
                            "@type": "ImageObject",
                            "url": "https:\/\/kha-zix-1.gitee.io\/llm-blog\/get-started\/featured-image.jpg",
                            "width":  1600 ,
                            "height":  840 
                        }],"genre": "posts","keywords": "入门","wordcount":  163 ,
        "url": "https:\/\/kha-zix-1.gitee.io\/llm-blog\/get-started\/","datePublished": "2024-03-31T21:29:01+08:00","dateModified": "2024-04-02T09:35:38+08:00","publisher": {
            "@type": "Organization",
            "name": ""},"author": {
                "@type": "Person",
                "name": "文君逸 翁菁璟 王裕文"
            },"description": "大模型入门教学"
    }
    </script></head>
    <body data-header-desktop="fixed" data-header-mobile="auto"><script type="text/javascript">(window.localStorage && localStorage.getItem('theme') ? localStorage.getItem('theme') === 'dark' : ('auto' === 'auto' ? window.matchMedia('(prefers-color-scheme: dark)').matches : 'auto' === 'dark')) && document.body.setAttribute('theme', 'dark');</script>

        <div id="mask"></div><div class="wrapper"><header class="desktop" id="header-desktop">
    <div class="header-wrapper">
        <div class="header-title">
            <a href="/llm-blog/" title="大模型系统"></a>
        </div>
        <div class="menu">
            <div class="menu-inner"><a class="menu-item" href="/llm-blog/posts/"> Posts </a><a class="menu-item" href="/llm-blog/tags/"> Tags </a><a class="menu-item" href="/llm-blog/categories/"> Categories </a><span class="menu-item delimiter"></span><span class="menu-item search" id="search-desktop">
                        <input type="text" placeholder="Search titles or contents..." id="search-input-desktop">
                        <a href="javascript:void(0);" class="search-button search-toggle" id="search-toggle-desktop" title="Search">
                            <i class="fas fa-search fa-fw" aria-hidden="true"></i>
                        </a>
                        <a href="javascript:void(0);" class="search-button search-clear" id="search-clear-desktop" title="Clear">
                            <i class="fas fa-times-circle fa-fw" aria-hidden="true"></i>
                        </a>
                        <span class="search-button search-loading" id="search-loading-desktop">
                            <i class="fas fa-spinner fa-fw fa-spin" aria-hidden="true"></i>
                        </span>
                    </span><a href="javascript:void(0);" class="menu-item theme-switch" title="Switch Theme">
                    <i class="fas fa-adjust fa-fw" aria-hidden="true"></i>
                </a></div>
        </div>
    </div>
</header><header class="mobile" id="header-mobile">
    <div class="header-container">
        <div class="header-wrapper">
            <div class="header-title">
                <a href="/llm-blog/" title="大模型系统"></a>
            </div>
            <div class="menu-toggle" id="menu-toggle-mobile">
                <span></span><span></span><span></span>
            </div>
        </div>
        <div class="menu" id="menu-mobile"><div class="search-wrapper">
                    <div class="search mobile" id="search-mobile">
                        <input type="text" placeholder="Search titles or contents..." id="search-input-mobile">
                        <a href="javascript:void(0);" class="search-button search-toggle" id="search-toggle-mobile" title="Search">
                            <i class="fas fa-search fa-fw" aria-hidden="true"></i>
                        </a>
                        <a href="javascript:void(0);" class="search-button search-clear" id="search-clear-mobile" title="Clear">
                            <i class="fas fa-times-circle fa-fw" aria-hidden="true"></i>
                        </a>
                        <span class="search-button search-loading" id="search-loading-mobile">
                            <i class="fas fa-spinner fa-fw fa-spin" aria-hidden="true"></i>
                        </span>
                    </div>
                    <a href="javascript:void(0);" class="search-cancel" id="search-cancel-mobile">
                        Cancel
                    </a>
                </div><a class="menu-item" href="/llm-blog/posts/" title="">Posts</a><a class="menu-item" href="/llm-blog/tags/" title="">Tags</a><a class="menu-item" href="/llm-blog/categories/" title="">Categories</a><a href="javascript:void(0);" class="menu-item theme-switch" title="Switch Theme">
                <i class="fas fa-adjust fa-fw" aria-hidden="true"></i>
            </a></div>
    </div>
</header><div class="search-dropdown desktop">
        <div id="search-dropdown-desktop"></div>
    </div>
    <div class="search-dropdown mobile">
        <div id="search-dropdown-mobile"></div>
    </div><main class="main">
                <div class="container"><div class="toc" id="toc-auto">
            <h2 class="toc-title">Contents</h2>
            <div class="toc-content always-active" id="toc-content-auto"></div>
        </div><article class="page single"><h1 class="single-title animate__animated animate__flipInX">大模型入门教学</h1><div class="post-meta">
            <div class="post-meta-line"><span class="post-author"><a href="/llm-blog/" title="Author" rel="author" class="author"><i class="fas fa-user-circle fa-fw" aria-hidden="true"></i>文君逸 翁菁璟 王裕文</a></span>&nbsp;<span class="post-category">included in <a href="/llm-blog/categories/%E6%96%87%E6%A1%A3/"><i class="far fa-folder fa-fw" aria-hidden="true"></i>文档</a></span></div>
            <div class="post-meta-line"><i class="far fa-calendar-alt fa-fw" aria-hidden="true"></i>&nbsp;<time datetime="2024-03-31">2024-03-31</time>&nbsp;<i class="fas fa-pencil-alt fa-fw" aria-hidden="true"></i>&nbsp;163 words&nbsp;
                <i class="far fa-clock fa-fw" aria-hidden="true"></i>&nbsp;One minute&nbsp;</div>
        </div><div class="featured-image"><img
        class="lazyload"
        src="/llm-blog/svg/loading.min.svg"
        data-src="/llm-blog/get-started/featured-image.jpg"
        data-srcset="/llm-blog/get-started/featured-image.jpg, /llm-blog/get-started/featured-image.jpg 1.5x, /llm-blog/get-started/featured-image.jpg 2x"
        data-sizes="auto"
        alt="/llm-blog/get-started/featured-image.jpg"
        title="大模型入门教学" /></div><div class="details toc" id="toc-static"  data-kept="">
                <div class="details-summary toc-title">
                    <span>Contents</span>
                    <span><i class="details-icon fas fa-angle-right" aria-hidden="true"></i></span>
                </div>
                <div class="details-content toc-content" id="toc-content-static"><nav id="TableOfContents">
  <ul>
    <li><a href="#第一步-基础知识">第一步 基础知识</a></li>
    <li><a href="#第二步-相关论文">第二步 相关论文</a></li>
    <li><a href="#第三步-代码实践">第三步 代码实践</a></li>
  </ul>
</nav></div>
            </div><div class="content" id="content"><p>大模型入门教学，帮助初学者快速入门大模型</p>
<h2 id="第一步-基础知识">第一步 基础知识</h2>
<p>书本</p>
<ul>
<li><a href="https://www.zybuluo.com/hanbingtao/note/541458" target="_blank" rel="noopener noreffer ">机器学习基础知识</a></li>
<li><a href="https://openmlsys.github.io/index.html" target="_blank" rel="noopener noreffer ">《机器学习系统：设计和实现》</a></li>
</ul>
<p>视频教学</p>
<ul>
<li><a href="https://www.bilibili.com/video/BV1TD4y137mP/?spm_id_from=333.337.search-card.all.click&amp;vd_source=21c35814be47b7eb1e90d1a6473aa57d" target="_blank" rel="noopener noreffer ">李宏毅机器学习课程</a></li>
<li><a href="https://www.bilibili.com/video/BV1Hm4y1i71t/?spm_id_from=333.999.0.0&amp;vd_source=21c35814be47b7eb1e90d1a6473aa57d" target="_blank" rel="noopener noreffer ">吴恩达机器学习课程</a></li>
<li><a href="https://www.bilibili.com/video/BV13U4y1N7Uo/?spm_id_from=333.999.section.playall&amp;vd_source=21c35814be47b7eb1e90d1a6473aa57d" target="_blank" rel="noopener noreffer ">跟李沐学AI</a></li>
<li><a href="https://b23.tv/ZLPfU8J" target="_blank" rel="noopener noreffer ">CMU机器学习系统</a></li>
</ul>
<p>以上是不同的课程教材，看课程的时候可以做课程互补，个人推荐李宏毅和李沐的教学，更适合新人一些。</p>
<h2 id="第二步-相关论文">第二步 相关论文</h2>
<p>学习大模型的经典论文：</p>
<ul>
<li><a href="https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf" target="_blank" rel="noopener noreffer ">transformer原论文</a></li>
<li><a href="https://arxiv.org/pdf/1810.04805.pdf" target="_blank" rel="noopener noreffer ">bert原论文</a></li>
<li><a href="https://www.mikecaptain.com/resources/pdf/GPT-1.pdf" target="_blank" rel="noopener noreffer ">GPT原论文（不是GPT-3，GPT-3与GPT-2更像是技术报告）</a></li>
</ul>
<p>学习以上论文，尤其是transformer，了解transformer的基本架构。</p>
<p>一些辅助视频链接：</p>
<ul>
<li><a href="https://www.bilibili.com/video/BV1pu411o7BE/?spm_id_from=333.999.0.0" target="_blank" rel="noopener noreffer ">transformer</a></li>
<li><a href="https://www.bilibili.com/video/BV1PL411M7eQ/?spm_id_from=333.999.0.0" target="_blank" rel="noopener noreffer ">bert</a></li>
<li><a href="https://www.bilibili.com/video/BV1AF411b7xQ/?spm_id_from=333.999.0.0" target="_blank" rel="noopener noreffer ">GPT</a></li>
<li><a href="https://www.youtube.com/watch?v=UPtG_38Oq8o" target="_blank" rel="noopener noreffer ">深入理解transformer(十分推荐)</a></li>
</ul>
<p>新同学在读一开始的几篇论文时可以上B站<code>跟李沐学AI</code>看论文视频，其他入门的主要论文有：</p>
<ul>
<li>Attention Is All You Need</li>
<li>Improving Language Understanding by Generative Pre-Training(GPT1)</li>
<li>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</li>
<li>Language Models are Unsupervised Multitask Learners(GPT2)</li>
<li>Language Models are Few-Shot Learners(GPT3)</li>
<li>Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism</li>
<li>GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism</li>
<li>PipeDream: Generalized Pipeline Parallelism for DNN Training</li>
<li>ZeRO: Memory Optimizations Toward Training Trillion Parameter Models</li>
<li>ZeRO-Offload: Democratizing Billion-Scale Model Training</li>
<li>ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning</li>
</ul>
<h2 id="第三步-代码实践">第三步 代码实践</h2>
<p>推荐一个课程，也是李沐的：</p>
<ul>
<li>
<p>主页：https://courses.d2l.ai/zh-v2/</p>
</li>
<li>
<p>每章节概念（类似PPT，但是是网页版）：https://zh-v2.d2l.ai/chapter_installation/index.html</p>
</li>
<li>
<p>课程视频：https://space.bilibili.com/1567748478/channel/seriesdetail?sid=358497</p>
</li>
</ul>
<p>按照他的介绍进行学习即可。如果对于循环神经网络等非自注意力的神经网络不感兴趣，可以直接看该教程的自注意力部分。</p>
<p>*其他辅助内容：</p>
<p>huggingface是一个许多AI从业人员都会使用的一个平台，拥有许多开源模型与数据集，能极大方便AI从业人员的工作。</p>
<ul>
<li>
<p>huggingface transformers库</p>
<p>拥有许多transformer架构的模型，如bert，gpt-2，可以很方面地下载使用。</p>
<p>huggingface transformers库文档链接：https://huggingface.co/docs/transformers/index</p>
<p>按照其上的quick tour进行transformer的安装与使用。如在本地部署一个语义分析器：</p>
<div class="highlight"><div class="chroma">
<table class="lntable"><tr><td class="lntd">
<pre tabindex="0" class="chroma"><code><span class="lnt"> 1
</span><span class="lnt"> 2
</span><span class="lnt"> 3
</span><span class="lnt"> 4
</span><span class="lnt"> 5
</span><span class="lnt"> 6
</span><span class="lnt"> 7
</span><span class="lnt"> 8
</span><span class="lnt"> 9
</span><span class="lnt">10
</span></code></pre></td>
<td class="lntd">
<pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">transformers</span> <span class="kn">import</span> <span class="n">pipeline</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># 语义分析器classifier</span>
</span></span><span class="line"><span class="cl"><span class="n">classifier</span> <span class="o">=</span> <span class="n">pipeline</span><span class="p">(</span><span class="s2">&#34;sentiment-analysis&#34;</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># 向classifier中输入文本</span>
</span></span><span class="line"><span class="cl"><span class="n">classifier</span><span class="p">(</span><span class="s2">&#34;We are very happy to show you the 🤗 Transformers library.&#34;</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Output：</span>
</span></span><span class="line"><span class="cl"><span class="c1"># [{&#39;label&#39;: &#39;POSITIVE&#39;, &#39;score&#39;: 0.9998}]</span>
</span></span></code></pre></td></tr></table>
</div>
</div></li>
</ul>
<p>主要看models文件夹，models文件夹包含transformers所支持的所有模型的python实现，如果希望了解模型的网络结构，如bert的网络结构，请查看models/bert/modeling_bert.py文件中对于Bert模型的网络定义。modeling_bert.py为pytorch架构下的bert网络实现，modeling_tf_bert.py为tensorflow架构下的bert网络实现，看modeling_bert.py就好，因为hugging face推荐使用pytorch架构。</p></div><div class="post-footer" id="post-footer">
    <div class="post-info">
        <div class="post-info-line">
            <div class="post-info-mod">
                <span>Updated on 2024-04-02</span>
            </div></div>
        <div class="post-info-line">
            <div class="post-info-md"><span>
                            <a class="link-to-markdown" href="/llm-blog/get-started/index.md" target="_blank">Read Markdown</a>
                        </span></div>
            <div class="post-info-share">
                <span><a href="javascript:void(0);" title="Share on Twitter" data-sharer="twitter" data-url="https://kha-zix-1.gitee.io/llm-blog/get-started/" data-title="大模型入门教学" data-via="xxxx" data-hashtags="入门"><i class="fab fa-twitter fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" title="Share on Facebook" data-sharer="facebook" data-url="https://kha-zix-1.gitee.io/llm-blog/get-started/" data-hashtag="入门"><i class="fab fa-facebook-square fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" title="Share on Hacker News" data-sharer="hackernews" data-url="https://kha-zix-1.gitee.io/llm-blog/get-started/" data-title="大模型入门教学"><i class="fab fa-hacker-news fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" title="Share on Line" data-sharer="line" data-url="https://kha-zix-1.gitee.io/llm-blog/get-started/" data-title="大模型入门教学"><i data-svg-src="/llm-blog/lib/simple-icons/icons/line.min.svg" aria-hidden="true"></i></a><a href="javascript:void(0);" title="Share on 微博" data-sharer="weibo" data-url="https://kha-zix-1.gitee.io/llm-blog/get-started/" data-title="大模型入门教学"><i class="fab fa-weibo fa-fw" aria-hidden="true"></i></a></span>
            </div>
        </div>
    </div>

    <div class="post-info-more">
        <section class="post-tags"><i class="fas fa-tags fa-fw" aria-hidden="true"></i>&nbsp;<a href="/llm-blog/tags/%E5%85%A5%E9%97%A8/">入门</a></section>
        <section>
            <span><a href="javascript:void(0);" onclick="window.history.back();">Back</a></span>&nbsp;|&nbsp;<span><a href="/llm-blog/">Home</a></span>
        </section>
    </div>

    <div class="post-nav"></div>
</div>
</article></div>
            </main><footer class="footer">
        <div class="footer-container"><div class="footer-line">Powered by <a href="https://gohugo.io/" target="_blank" rel="noopener noreffer" title="Hugo 0.124.1">Hugo</a> | Theme - <a href="https://github.com/dillonzq/LoveIt" target="_blank" rel="noopener noreffer" title="LoveIt 0.2.11"><i class="far fa-kiss-wink-heart fa-fw" aria-hidden="true"></i> LoveIt</a>
                </div><div class="footer-line" itemscope itemtype="http://schema.org/CreativeWork"><i class="far fa-copyright fa-fw" aria-hidden="true"></i><span itemprop="copyrightYear">2019 - 2024</span><span class="author" itemprop="copyrightHolder">&nbsp;<a href="/llm-blog/" target="_blank">陈武辉</a></span>&nbsp;|&nbsp;<span class="license"><a rel="license external nofollow noopener noreffer" href="https://creativecommons.org/licenses/by-nc/4.0/" target="_blank">CC BY-NC 4.0</a></span></div>
        </div>
    </footer></div>

        <div id="fixed-buttons"><a href="#" id="back-to-top" class="fixed-button" title="Back to Top">
                <i class="fas fa-arrow-up fa-fw" aria-hidden="true"></i>
            </a><a href="#" id="view-comments" class="fixed-button" title="View Comments">
                <i class="fas fa-comment fa-fw" aria-hidden="true"></i>
            </a>
        </div><link rel="stylesheet" href="/llm-blog/lib/lightgallery/css/lightgallery-bundle.min.css"><link rel="stylesheet" href="/llm-blog/lib/katex/katex.min.css"><link rel="stylesheet" href="/llm-blog/lib/cookieconsent/cookieconsent.min.css"><script type="text/javascript" src="/llm-blog/lib/autocomplete/autocomplete.min.js"></script><script type="text/javascript" src="/llm-blog/lib/lunr/lunr.min.js"></script><script type="text/javascript" src="/llm-blog/lib/lazysizes/lazysizes.min.js"></script><script type="text/javascript" src="/llm-blog/lib/lightgallery/lightgallery.min.js"></script><script type="text/javascript" src="/llm-blog/lib/lightgallery/plugins/thumbnail/lg-thumbnail.min.js"></script><script type="text/javascript" src="/llm-blog/lib/lightgallery/plugins/zoom/lg-zoom.min.js"></script><script type="text/javascript" src="/llm-blog/lib/clipboard/clipboard.min.js"></script><script type="text/javascript" src="/llm-blog/lib/sharer/sharer.min.js"></script><script type="text/javascript" src="/llm-blog/lib/katex/katex.min.js"></script><script type="text/javascript" src="/llm-blog/lib/katex/contrib/auto-render.min.js"></script><script type="text/javascript" src="/llm-blog/lib/katex/contrib/copy-tex.min.js"></script><script type="text/javascript" src="/llm-blog/lib/katex/contrib/mhchem.min.js"></script><script type="text/javascript" src="/llm-blog/lib/cookieconsent/cookieconsent.min.js"></script><script type="text/javascript">window.config={"code":{"copyTitle":"Copy to clipboard","maxShownLines":50},"comment":{},"cookieconsent":{"content":{"dismiss":"Got it!","link":"Learn more","message":"This website uses Cookies to improve your experience."},"enable":true,"palette":{"button":{"background":"#f0f0f0"},"popup":{"background":"#1aa3ff"}},"theme":"edgeless"},"lightgallery":true,"math":{"delimiters":[{"display":true,"left":"$$","right":"$$"},{"display":true,"left":"\\[","right":"\\]"},{"display":true,"left":"\\begin{equation}","right":"\\end{equation}"},{"display":true,"left":"\\begin{equation*}","right":"\\end{equation*}"},{"display":true,"left":"\\begin{align}","right":"\\end{align}"},{"display":true,"left":"\\begin{align*}","right":"\\end{align*}"},{"display":true,"left":"\\begin{alignat}","right":"\\end{alignat}"},{"display":true,"left":"\\begin{alignat*}","right":"\\end{alignat*}"},{"display":true,"left":"\\begin{gather}","right":"\\end{gather}"},{"display":true,"left":"\\begin{CD}","right":"\\end{CD}"},{"display":false,"left":"$","right":"$"},{"display":false,"left":"\\(","right":"\\)"}],"strict":false},"search":{"highlightTag":"em","lunrIndexURL":"/llm-blog/index.json","maxResultLength":10,"noResultsFound":"No results found","snippetLength":30,"type":"lunr"}};</script><script type="text/javascript" src="/llm-blog/js/theme.min.js"></script></body>
</html>
